import requests
from bs4 import BeautifulSoup
from lxml import etree
import re
from selenium import webdriver
import time
from urllib.parse import urlparse

headersParameters = {  # 发送HTTP请求时的HEAD信息，用于伪装为浏览器
    'Connection': 'Keep-Alive',
    'Accept': 'text/html, application/xhtml+xml, */*',
    'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
    'Accept-Encoding': 'gzip, deflate',
    'User-Agent': 'Mozilla/6.1 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
browser=webdriver.Chrome()
# keyword = "江苏百瑞赢骗子"
keywords = ['江苏百瑞赢证券咨询有限公司','信易赢','江苏百瑞赢骗子','百瑞赢诈骗','百瑞赢骗局','信易赢骗局','百瑞赢退费','百瑞赢','江苏百瑞赢','百瑞赢证券咨询','百瑞赢骗子']
for ks in keywords:
    url = u'https://so.m.sm.cn/s?q='+ks
    browser.get(url)
    #停顿两秒，点击下一页
    print('第一页')
    time.sleep(2)
    mya = browser.find_element_by_id('pager')
    mya.click()
    #停顿两秒，再点击下一页
    print('第二页')
    time.sleep(2)
    mya = browser.find_element_by_id('pager')
    mya.click()
    #停顿两秒，再点击下一页
    print('第三页')
    time.sleep(2)
    mya = browser.find_element_by_id('pager')
    mya.click()
    #停顿两秒，再点击下一页
    print('第四页')
    time.sleep(2)
    mya = browser.find_element_by_id('pager')
    mya.click()
    #停顿两秒，再点击下一页
    # print('第五页')
    # time.sleep(2)
    # mya = browser.find_element_by_id('pager')
    # mya.click()
    data=browser.page_source
    print(data)
    browser.close()
    soup=BeautifulSoup(data,'html.parser')
    print(soup)
    # h3 = soup.find_all('h3')
    div3 = soup.find_all(name='div',attrs={"class":"title-text"})
    
    print(div3)
    for myh3 in div3:
            try:
                table=myh3.find('span').get_text()
                print(table)
               
            
            except Exception as e:
                print(str(e))




              